##########################
#TB DIAGNOSTICS EVALUATION
##########################

rm(list=ls())

setwd('C:\\Users\\HP\\OneDrive\\Desktop\\Eunita\\MPH project\\STATA\\Open bugs data\\TB DATA')

if(!R.Version()$arch=='i386'){message('Switch to 32 bit R');q()}

if(!'BRugs' %in% row.names(installed.packages())){install.packages('BRugs')}; require('BRugs')

##############################################################################################
###############
#Some functions
###############

median.range <- function(x){
  
  y <-c(summary(x)[3],summary(x)[1],summary(x)[6])
  
  y
  
}

#################################################

tb.dat <- read.csv('TB_data.csv',header=T)

str(tb.dat)

(1:190)[which(!1:190 %in% tb.dat$SERIAL_NUM)] #serial no 11 missing

#########################################################################
#DESCRIPTIVE STATS
###########################################################################

median.range(tb.dat$AGE)

median.range(tb.dat$WEIGHT)

nams <- colnames(tb.dat)[c(3:7,9)]; prop.list <- list(); 

for(i in 1:length(nams)){ 
  
  prop.list[[i]] <- round(prop.table(table(tb.dat[,nams[i]]))*100,1)
  
  names(prop.list)[i] <- nams[i]
  
}

prop.list #percentages of categorical vars

###############################################################################
#BAYESIAN CODE
##############

n.tests <- 3; n.pop <- 1

model.tb <- function(){
  
  for(i in 1:3){ #priors test 1: LAM test 2: X.PERT test 3: SPUTUM
    
    se[i] ~ dbeta(1,1)
    sp[i] ~ dbeta(1,1)
    
  }
  
  p ~ dbeta(1,1)
  
  pop[1,1:8] ~ dmulti(par[1,1:8],n)
  
  par[1,1] <- se[1]*se[2]*se[3]*p + (1-sp[1])*(1-sp[2])*(1-sp[3])*(1-p)
  par[1,2] <- se[1]*se[2]*(1-se[3])*p + (1-sp[1])*(1-sp[2])*sp[3]*(1-p)
  par[1,3] <- se[1]*(1-se[2])*se[3]*p + (1-sp[1])*sp[2]*(1-sp[3])*(1-p)
  par[1,4] <- (1-se[1])*se[2]*se[3]*p + sp[1]*(1-sp[2])*(1-sp[3])*(1-p)
  par[1,5] <- se[1]*(1-se[2])*(1-se[3])*p + (1-sp[1])*sp[2]*sp[3]*(1-p)
  par[1,6] <- (1-se[1])*se[2]*(1-se[3])*p + sp[1]*(1-sp[2])*sp[3]*(1-p)
  par[1,7] <- (1-se[1])*(1-se[2])*se[3]*p + sp[1]*sp[2]*(1-sp[3])*(1-p)
  par[1,8] <- (1-se[1])*(1-se[2])*(1-se[3])*p + sp[1]*sp[2]*sp[3]*(1-p)
  
  n <- sum(pop[1,1:8])
  
  for(i in 1:3){ #Calculate ppv and npv and Youden's index estimates
    
    ppv[i] <- se[i]*p/(se[i]*p + (1-sp[i])*(1-p))
    
    npv[i] <- sp[i]*(1-p)/(sp[i]*(1-p) +  (1-se[i])*p)
    
    y.index[i] <- (se[i] + sp[i]) - 1
    
  }
  
  #Test whether the Se and Sp of the 3 tests differ
  p.se[1] <- step(se[1] - se[2])
  p.se[2] <- step(se[1] - se[3])
  p.se[3] <- step(se[2] - se[3])
  
  p.sp[1] <- step(sp[1] - sp[2])
  p.sp[2] <- step(sp[1] - sp[3])
  p.sp[3] <- step(sp[2] - sp[3])
  
  
}


##################################################################################################
#Data
#######

tb.mat <- matrix(NA,nrow=n.pop,ncol=2^n.tests)

tb.mat[1,1] <- nrow(tb.dat[tb.dat$LAM==1 & tb.dat$X.PERT==1 & tb.dat$SPUTUM==1 & complete.cases(tb.dat[,c('LAM','X.PERT','SPUTUM')]),])
tb.mat[1,2] <- nrow(tb.dat[tb.dat$LAM==1 & tb.dat$X.PERT==1 & tb.dat$SPUTUM==0 & complete.cases(tb.dat[,c('LAM','X.PERT','SPUTUM')]),])
tb.mat[1,3] <- nrow(tb.dat[tb.dat$LAM==1 & tb.dat$X.PERT==0 & tb.dat$SPUTUM==1 & complete.cases(tb.dat[,c('LAM','X.PERT','SPUTUM')]),])
tb.mat[1,4] <- nrow(tb.dat[tb.dat$LAM==0 & tb.dat$X.PERT==1 & tb.dat$SPUTUM==1 & complete.cases(tb.dat[,c('LAM','X.PERT','SPUTUM')]),])
tb.mat[1,5] <- nrow(tb.dat[tb.dat$LAM==1 & tb.dat$X.PERT==0 & tb.dat$SPUTUM==0 & complete.cases(tb.dat[,c('LAM','X.PERT','SPUTUM')]),])
tb.mat[1,6] <- nrow(tb.dat[tb.dat$LAM==0 & tb.dat$X.PERT==1 & tb.dat$SPUTUM==0 & complete.cases(tb.dat[,c('LAM','X.PERT','SPUTUM')]),])
tb.mat[1,7] <- nrow(tb.dat[tb.dat$LAM==0 & tb.dat$X.PERT==0 & tb.dat$SPUTUM==1 & complete.cases(tb.dat[,c('LAM','X.PERT','SPUTUM')]),])
tb.mat[1,8] <- nrow(tb.dat[tb.dat$LAM==0 & tb.dat$X.PERT==0 & tb.dat$SPUTUM==0 & complete.cases(tb.dat[,c('LAM','X.PERT','SPUTUM')]),])

tb.mat.list <- list(tb.mat); names(tb.mat.list) <- 'pop'

####################################################################################################################################################################################
#Convert all to BUGS format

#write model to a file
writeModel(model.tb,'model.tb.txt')

#Bugs data
bugsData(tb.mat.list,fileName='tb.mat.list.txt')

#make 2 initial values chains 
bugsInits(inits=list(list(se=rep(0.90,times=3),sp=rep(0.95,times=3),p=0.10)),numChains=1,'CID.Init1.txt')
bugsInits(inits=list(list(se=rep(0.95,times=3),sp=rep(0.90,times=3),p=0.05)),numChains=1,'CID.Init2.txt')

#now check, load data, compile etc.
modelCheck('model.tb.txt') #check model file.
modelData('tb.mat.list.txt') #read data file
modelCompile(numChains=2) #compile model with 2 chains
modelInits('CID.Init1.txt',1) #read init data file
modelInits('CID.Init2.txt',2) #read init data file
#modelGenInits() #generate the missing initial values

modelUpdate(20000) #burn in

samplesSet(c('se','sp','p','ppv','npv','p.se','p.sp','y.index')) #parameters to monitor

modelUpdate(50000) #more iterations 

#SOME DIAGNOSTICS FIRST
#Check convergence (Trace plots) - should ideally check all
#samplesHistory('se',mfrow=c(1,1)) # plot the chain,
#samplesHistory('sp',mfrow=c(1,1)) # plot the chain,
#samplesHistory('p',mfrow=c(1,1))

#Plot the Gelman-Rubin diagnostic statistics - ratio should be close to 1
#samplesBgr('se',mfrow=c(1,1))  
#samplesBgr('sp',mfrow=c(1,1))
#samplesBgr('p',mfrow=c(1,1))

#Density plots
samplesDensity('se',mfrow=c(1,1)) 
samplesDensity('sp',mfrow=c(1,1))
samplesDensity('p',mfrow=c(1,1))

(results.tb <- samplesStats('*'))

capture.output(results.tb,file='TB_results.txt')

#########################################################################################
#END
